#clean up
rm(list=ls())

#set seed
set.seed(715130425)

#load libraries
library(akima)
library(geoR)
library(scatterplot3d)  
library(ggmap)
library(lattice)
library(foreign)
library(car)
library(xtable)
library(arm)

#Set to working directory. Adjust the command below to reflect YOUR working directory.
#setwd('/Volumes/MONOGAN/gillAreal/ccesCode/')
#setwd('/Users/jamie/Documents/gillAreal/ccesCode/')
#setwd('/Users/jamie/Desktop/nhgis0003_shape/nhgis0003_shapefile_us_tract_1970/')

#load data
combined<-read.dta('cces08reformatted.dta')

#load forecast covariates
forecasters<-read.csv('krigedPoints.csv')

#subset to Virginia districts
state.F<-forecasters
va.F<-subset(forecasters,!is.na(cd))

###Fit multilevel regression model for districts###
dist.mlm<-lmer(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+(1|state)+(1|fipsCD), data=combined)
summary(dist.mlm)

#save random intercepts vector
state.int<-ranef(dist.mlm)$state
state.int.va<-state.int["VA",]
dist.int<-ranef(dist.mlm)$fipsCD
dist.int.va<-dist.int[c('5101','5102','5103','5104','5105','5106','5107','5108','5109','5110','5111'),]

#outputs
va.f.means<-rep(NA,dim(va.F)[1])

#make forecast for every observation
for (i in 1:dim(va.F)[1]){
	#define covariate vector from draw
	new.cov<-c(1,
	va.F[i,'age'],
	va.F[i,'educ'],
	as.numeric(va.F[i,'race']==2),
	as.numeric(va.F[i,'race']==3),
	va.F[i,'female'],
	as.numeric(va.F[i,'ftotinc']==2),
	as.numeric(va.F[i,'ftotinc']==3),
	as.numeric(va.F[i,'ftotinc']==4),
	as.numeric(va.F[i,'ftotinc']==5),
	as.numeric(va.F[i,'ftotinc']==6),
	as.numeric(va.F[i,'ftotinc']==7),
	as.numeric(va.F[i,'ftotinc']==8),
	as.numeric(va.F[i,'ftotinc']==9),
	as.numeric(va.F[i,'ftotinc']==10),
	as.numeric(va.F[i,'ftotinc']==11),
	as.numeric(va.F[i,'ftotinc']==12),
	as.numeric(va.F[i,'ftotinc']==13),
	as.numeric(va.F[i,'ftotinc']==14),
	va.F[i,'cath'],
	va.F[i,'mormon'],
	va.F[i,'ortho'],
	va.F[i,'jewish'],
	va.F[i,'islam'],
	va.F[i,'main'],
	va.F[i,'evan'],
	va.F[i,'rural'],
	va.F[i,'ownershp'],
	as.numeric(va.F[i,'empstat']==2),
	as.numeric(va.F[i,'empstat']==3),
	va.F[i,'age']* va.F[i,'educ'],
	as.numeric(va.F[i,'race']==2)*va.F[i,'female'],
	as.numeric(va.F[i,'race']==3)*va.F[i,'female'])

	#create mean forecast
	va.f.means[i]<-new.cov%*%dist.mlm@fixef+state.int.va+dist.int.va[va.F$cd[i]]
}

#Aggregate by state
va.forecast.matrix<-cbind(va.f.means,va.F$cd)
mrp.cd<-as.numeric(by(va.forecast.matrix[,1],va.forecast.matrix[,2],mean))
mrp.cd.between<-as.numeric(by(va.forecast.matrix[,1],va.forecast.matrix[,2],var))
mrp.cd.within<-sigma.hat(dist.mlm)$sigma$data^2
no.cd.kriges<-table(va.F$cd)
mrp.cd.var<-mrp.cd.within+((no.cd.kriges+1)/no.cd.kriges)*mrp.cd.between

#write out
write.csv(cbind(mrp.cd,mrp.cd.var),'mrpVAdist.csv')


###Fit multilevel regression model for states###
ideol.mlm<-lmer(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+(1|state), data=combined)
summary(ideol.mlm)

#save random intercepts vector
ran.int<-ideol.mlm@ranef
names(ran.int)<-c("AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","IA","ID","IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY")

#outputs
f.means<-rep(NA,dim(state.F)[1])

#make forecast for every observation
for (i in 1:dim(state.F)[1]){
	#define covariate vector from draw
	new.cov<-c(1,
	state.F[i,'age'],
	state.F[i,'educ'],
	as.numeric(state.F[i,'race']==2),
	as.numeric(state.F[i,'race']==3),
	state.F[i,'female'],
	as.numeric(state.F[i,'ftotinc']==2),
	as.numeric(state.F[i,'ftotinc']==3),
	as.numeric(state.F[i,'ftotinc']==4),
	as.numeric(state.F[i,'ftotinc']==5),
	as.numeric(state.F[i,'ftotinc']==6),
	as.numeric(state.F[i,'ftotinc']==7),
	as.numeric(state.F[i,'ftotinc']==8),
	as.numeric(state.F[i,'ftotinc']==9),
	as.numeric(state.F[i,'ftotinc']==10),
	as.numeric(state.F[i,'ftotinc']==11),
	as.numeric(state.F[i,'ftotinc']==12),
	as.numeric(state.F[i,'ftotinc']==13),
	as.numeric(state.F[i,'ftotinc']==14),
	state.F[i,'cath'],
	state.F[i,'mormon'],
	state.F[i,'ortho'],
	state.F[i,'jewish'],
	state.F[i,'islam'],
	state.F[i,'main'],
	state.F[i,'evan'],
	state.F[i,'rural'],
	state.F[i,'ownershp'],
	as.numeric(state.F[i,'empstat']==2),
	as.numeric(state.F[i,'empstat']==3),
	state.F[i,'age']* state.F[i,'educ'],
	as.numeric(state.F[i,'race']==2)*state.F[i,'female'],
	as.numeric(state.F[i,'race']==3)*state.F[i,'female'])

	#create mean forecast
	f.means[i]<-new.cov%*%ideol.mlm@fixef+ran.int[paste(state.F$state[i])]
}

#Aggregate by state
forecast.matrix<-cbind(f.means,state.F$state)
mrp.state<-as.numeric(by(forecast.matrix[,1],forecast.matrix[,2],mean))
mrp.between<-as.numeric(by(forecast.matrix[,1],forecast.matrix[,2],var))
mrp.within<-sigma.hat(ideol.mlm)$sigma$data^2
no.kriges<-table(state.F$state)
mrp.state.var<-mrp.within+((no.kriges+1)/no.kriges)*mrp.between

#write out
write.csv(cbind(mrp.state,mrp.state.var),'mrpState.csv')

###Subsetting###
#obtain simple state averages
stateSubsetting<-aggregate(x=combined$ideology, by=list(state=combined$state), FUN=mean, na.rm=T)
colnames(stateSubsetting)[2]<-"unweighted"

#obtain weighted state averages
stateSubsetting$weighted<-sapply(split(combined,combined$state),function(x){weighted.mean(x$ideology,w=x$weight)})[-c(1,12,40)]

#write-out estimates
write.csv(stateSubsetting, 'stateSubsetting.csv',row.names=F)

##obtain simple district averages##
#identify locations of Virginia respondents
va.cces<-subset(combined,state=="VA")

#place coordinates
#pol.no<-va.points%over%va.cd.2
#cd<-recode(pol.no,'1=6;2=9;3=10;4=1;5=11;6=5;7=7;8=8;9=4;10=3;11=2')

#obtain simple district averages
distSubsetting<-aggregate(x=va.cces$ideology, by=list(CD=va.cces$cd), FUN=mean, na.rm=T)
colnames(distSubsetting)[2]<-"unweighted"

#obtain weighted district averages
distSubsetting$weighted<-sapply(split(va.cces,va.cces$cd),function(x){weighted.mean(x$ideology,w=x$weight)})

#write-out estimates
write.csv(distSubsetting, 'distSubsetting.csv',row.names=F)
